# replication data for 
# "Exploiting a Crisis: Abortion Activism and the COVID-19 Pandemic"
#
# Forthcoming in Perspectives on Politics
# Kate Hunt 
# huntkate@iu.edu
# Indiana University

# this script will load the data and conduct analyses that appear in that
# article

# variables:
#
# status_id: twitter status identifier for each tweet
# screen_name: twitter "handle" for organization
# country/country_cap: country identifier for organization
# stance/stance_cap: pro-/anti-abortion rights org. identifier
# covid_coded_all: identifier for tweets having covid content
# thematic: identifier for hand-coding of covid-related "themes"
#    2 = threat
#    3 = opportunity
#    4 = other (not used in paper)

# load needed packages
require(tidyverse)
require(lubridate)
require(rtweet)


# read data - change for local filepath
rep_data <- read_twitter_csv("persp_replication_data.csv")

# requires access to Twitter API for full tweet details
# (not needed to run article replications)
# For example:
# tweets <- rtweet::lookup_tweets(rep_data$status_id)

#
# "hydrate" tweets
# 

# count of total tweets
rep_data %>% 
  filter(created_at >= "2020-01-01" & created_at <= "2020-08-01") %>% 
  count()

# make dataframe of country names/stances
label_reorder <- tibble(
  country_order=rep(c(3, 1, 2, 5, 4), 2),
  country=rep(c("canada","ireland","ni","us","us-fed"),2),
  stance=rep(c("pro-choice","pro-life"), 5),
  country_cap=rep(c("Canada","Rep. Ireland","N. Ireland", 
                    "US-States", "US-Federal"), 2),
  stance_cap=rep(c("Pro-Choice","Pro-Life"), 5)
)

# table 1
rep_data %>%
  

  filter(created_at >= "2020-01-01" & created_at <= "2020-08-01") %>% 
  count(country, screen_name, stance) %>% 
  print(n = 30)


# Figure 1 - Deviation from Average
# deviation from average
rep_data %>% 
  mutate(created_at = ymd_hms(created_at)) %>% 
  filter(created_at >= "2020-01-01" & created_at < "2020-08-01") %>% 
  mutate(weekly = floor_date(created_at, "weeks")) %>%  
  group_by(weekly, country, stance) %>% 
  summarise(avg = n()) %>% 
  group_by(country, stance) %>% 
  summarise(avg_tweets = mean(avg), sd_tweets = sd(avg)) %>% 
  ungroup %>% 
  left_join(
    .,
    covid_compare %>% 
      mutate(created_at = ymd_hms(created_at)) %>% 
      filter(created_at >= "2020-01-01" & created_at < "2020-08-01") %>% 
      mutate(weekly = floor_date(created_at, "weeks")) %>% 
      count(weekly, country, stance)
  ) %>% 
  mutate(z = (n-avg_tweets)/sd_tweets) %>%  
  left_join(., label_reorder) %>% 
  mutate(stance_cap = ifelse(stance_cap == "Pro-Choice",
                             "Pro-Abortion Rights",
                             ifelse(stance_cap == "Pro-Life",
                                    "Anti-Abortion Rights", NA)),
         stance_ord = ifelse(grepl("Anti", stance_cap), 1, 2)) %>% 
  ggplot(aes(weekly, z)) +
    geom_line(aes(linetype = reorder(stance_cap, -stance_ord))) +
    facet_grid(reorder(country_cap, country_order) ~ .) +
    theme_minimal() +
    theme(legend.title = element_blank()) +
    xlab("") +
    ylab("Z-score") +
    ggtitle("Figure 1. Tweeting Deviation from Average") +
    scale_linetype_discrete(breaks = c("Pro-Abortion Rights","Anti-Abortion Rights"))



# Figure 2 - Covid usage over time
rep_data %>% 
  filter(covid_coded_all == 1) %>%
  count(date = as_date(created_at), country, stance) %>% 
  filter(country != "uk" & date < "2020-08-01") %>% 
  left_join(., label_reorder) %>% 
  mutate(stance_cap = ifelse(stance_cap == "Pro-Choice", "Pro-Abortion Rights",
                             "Anti-Abortion Rights"),
         stance_ord = ifelse(grepl("Anti", stance_cap), 1, 2)) %>%
  ggplot(aes(date, n)) + 
  geom_line(aes(linetype=reorder(stance_cap, -stance_ord))) + 
  facet_grid(reorder(country_cap, country_order) ~ .) +
  xlab("") +
  ylab("") +
  ggtitle("Figure 2: COVID-19 Tweets") + 
  theme_minimal() +
  scale_y_continuous(limits = c(0,12), breaks = seq(0,12,4)) +
  theme(legend.title = element_blank()) 

# table 2
rep_data %>% 
  filter(created_at >= "2020-03-01" & created_at <= "2020-08-01") %>% 
  group_by(country, stance) %>% 
  summarise(tweets = n(),
            covid_tweets = sum(covid_coded_all, na.rm = T)) %>% 
  ungroup %>% 
  mutate(covid_proportion = covid_tweets/tweets) %>% 
  group_by(country) %>% 
  mutate(largest = ifelse(covid_proportion == max(covid_proportion), "*", ""))


# table 3 
rep_data %>% 
  filter(covid_coded_all != 0 & created_at >= "2020-03-01" & created_at <= "2020-08-01") %>% 
  count(country, stance, thematic) %>% 
  mutate(thematic = ifelse(thematic == 2, "Threat",
                    ifelse(thematic == 3, "Opportunity", NA))) %>% 
  group_by(country, stance) %>% 
  mutate(perc = 100*(n/sum(n))) %>% 
  ungroup %>% 
  filter(!is.na(thematic)) 


